import pandas as pd
import numpy as np
import cv2
import os
import re
import tqdm
from tqdm.auto import tqdm as tqdm
from sklearn.model_selection import KFold
from PIL import Image
import albumentations as albu
from albumentations.pytorch.transforms import ToTensorV2
import torch
from torch.optim.lr_scheduler import StepLR, MultiStepLR
import torchvision
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
from torchvision.models.detection import FasterRCNN
from torchvision.models.detection.rpn import AnchorGenerator
from torch.utils.data import DataLoader , Dataset
from torch.utils.data.sampler import SequentialSampler
from matplotlib import pyplot as plt
DIR_INPUT = '/kaggle/input/global-wheat-detection'
DIR_TRAIN = f'{DIR_INPUT}/train'
DIR_TEST = f'{DIR_INPUT}/test'
def iou_numpy(outputs: np.array, labels: np.array):
outputs = outputs.squeeze(1)
intersection = (outputs & labels).sum((1, 2))
union = (outputs | labels).sum((1, 2))
iou = (intersection + SMOOTH) / (union + SMOOTH)
thresholded = np.ceil(np.clip(20 * (iou - 0.5), 0, 10)) / 10
return thresholded # Or thresholded.mean()
def calculate_iou(gt, pr, form='pascal_voc') -> float:
if form == 'coco':
gt = gt.copy()
pr = pr.copy()
gt[2] = gt[0] + gt[2]
gt[3] = gt[1] + gt[3]
pr[2] = pr[0] + pr[2]
pr[3] = pr[1] + pr[3]
# Calculate overlap area
dx = min(gt[2], pr[2]) - max(gt[0], pr[0]) + 1
if dx < 0:
return 0.0
dy = min(gt[3], pr[3]) - max(gt[1], pr[1]) + 1
if dy < 0:
return 0.0
overlap_area = dx * dy
# Calculate union area
union_area = (
(gt[2] - gt[0] + 1) * (gt[3] - gt[1] + 1) +
(pr[2] - pr[0] + 1) * (pr[3] - pr[1] + 1) -
overlap_area
)
return overlap_area / union_area
def find_best_match(gts, pred, pred_idx, threshold=0.5, form='pascal_voc', ious=None) -> int:
best_match_iou = -np.inf
best_match_idx = -1
for gt_idx in range(len(gts)):
if gts[gt_idx][0] < 0:
# Already matched GT-box
continue
iou = -1 if ious is None else ious[gt_idx][pred_idx]
if iou < 0:
iou = calculate_iou(gts[gt_idx], pred, form=form)
if ious is not None:
ious[gt_idx][pred_idx] = iou
if iou < threshold:
continue
if iou > best_match_iou:
best_match_iou = iou
best_match_idx = gt_idx
return best_match_idx, best_match_iou
def calculate_image_precision(gts, preds, thresholds=(0.5,), form='coco') -> float:
n_threshold = len(thresholds)
image_precision = 0.0
ious = np.ones((len(gts), len(preds))) * -1
for threshold in thresholds:
precision_at_threshold = calculate_precision(gts.copy(), preds, threshold=threshold,form=form, ious=ious)
image_precision += precision_at_threshold / n_threshold
return image_precision
def calculate_precision(gts, preds, threshold=0.5, form='coco', ious=None) -> float:
n = len(preds)
tp = 0
fp = 0
for pred_idx in range(n):
best_match_gt_idx, _ = find_best_match(gts, preds[pred_idx], pred_idx,threshold=threshold, form=form, ious=ious)
if best_match_gt_idx >= 0:
tp += 1
gts[best_match_gt_idx] = -1
else:
fp += 1
fn = (gts.sum(axis=1) > 0).sum()
return tp / (tp + fp + fn)
def collate_fn(batch):
return tuple(zip(*batch))
class Averager:
def __init__(self):
self.current_total = 0.0
self.iterations = 0.0
def send(self, value):
self.current_total += value
self.iterations += 1
@property
def value(self):
if self.iterations == 0:
return 0
else:
return 1.0 * self.current_total / self.iterations
def reset(self):
self.current_total = 0.0
self.iterations = 0.0
BBOXS_DIR = '/kaggle/input/global-wheat-detection/train.csv'
train_df = pd.read_csv(BBOXS_DIR)
train_df['x'] = -1
train_df['y'] = -1
train_df['w'] = -1
train_df['h'] = -1
def expand_bbox(x):
r = np.array(re.findall("([0-9]+[.]?[0-9]*)", x))
if len(r) == 0:
r = [-1, -1, -1, -1]
return r
train_df[['x', 'y', 'w', 'h']] = np.stack(train_df['bbox'].apply(lambda x: expand_bbox(x)))
train_df.drop(columns=['bbox'], inplace=True)
train_df['x'] = train_df['x'].astype(np.float)
train_df['y'] = train_df['y'].astype(np.float)
train_df['w'] = train_df['w'].astype(np.float)
train_df['h'] = train_df['h'].astype(np.float)
def get_random_id():
import random
ids = train_df["image_id"].unique()
random.shuffle(ids)
return ids[0]
def get_bbox(image_id):
image_df = train_df[train_df["image_id"]==image_id]
bbox_list = image_df[["x","y", "w", "h"]].values
return bbox_list
def get_img(image_id):
img = cv2.imread(os.path.join(DIR_TRAIN, f'{image_id}.jpg'))
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype(np.float32)
img /= 255.0
return img
def visualize_bbox(img, bbox, color=(255,0,0), thickness=2, **kwargs):
x_min, y_min, w, h = bbox
x_min, x_max, y_min, y_max = int(x_min), int(x_min + w), int(y_min), int(y_min + h)
cv2.rectangle(img, (x_min, y_min), (x_max, y_max), color=color, thickness=thickness)
return img
def augment_and_show(aug ,img_id = None ):
if img_id is None:
img_id = get_random_id()
img = get_img(img_id)
else:
img = get_img(img_id)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB).astype(np.float32)
boxes = get_bbox(img_id)
labels = np.ones(boxes.shape[0])
augmented = aug(image=img,mask = None ,bboxes=boxes , labels = np.ones((boxes.shape[0],)))
img = cv2.cvtColor(img , cv2.COLOR_BGR2RGB)
img_aug = cv2.cvtColor(augmented['image'] , cv2.COLOR_BGR2RGB)
for bbox in boxes:
img = visualize_bbox(img, bbox)
for bbox in augmented['bboxes']:
img_aug = visualize_bbox(img_aug, bbox)
f, ax = plt.subplots(1, 2, figsize=(16, 8))
ax[0].imshow(img)
ax[0].set_title('Original image')
ax[1].imshow(img_aug)
ax[1].set_title('Augmented image')
f.tight_layout()
aug_1 = albu.Compose([albu.RandomGamma(gamma_limit=(120, 180),p=1), albu.RandomContrast((0,0.8),p=1)])
augment_and_show(aug_1)
aug_2 = albu.Compose([albu.ToGray(p=1)])
augment_and_show(aug_2)
aug_3 = albu.ChannelShuffle(p=1)
augment_and_show(aug_3)
class WheatDataset(Dataset):
def __init__(self, dataframe, image_dir, transforms=None):
super().__init__()
self.image_ids = dataframe['image_id'].unique()
self.df = dataframe
self.image_dir = image_dir
self.transforms = transforms
def __getitem__(self, index: int):
image_id = self.image_ids[index]
records = self.df[self.df['image_id'] == image_id]
image = cv2.imread(f'{self.image_dir}/{image_id}.jpg', cv2.IMREAD_COLOR)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
image /= 255.0
boxes = records[['x', 'y', 'w', 'h']].values
boxes[:, 2] = boxes[:, 0] + boxes[:, 2]
boxes[:, 3] = boxes[:, 1] + boxes[:, 3]
area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
area = torch.as_tensor(area, dtype=torch.float32)
#there is only one class
labels = torch.ones((records.shape[0],), dtype=torch.int64)
#suppose all instances are not crowd
iscrowd = torch.zeros((records.shape[0],), dtype=torch.int64)
target = {}
target['boxes'] = boxes
target['labels'] = labels
target['image_id'] = torch.tensor([index])
target['area'] = area
target['iscrowd'] = iscrowd
if self.transforms:
sample = {
'image': image,
'bboxes': target['boxes'],
'labels': labels
}
sample = self.transforms(**sample)
image = sample['image']
target['boxes'] = torch.stack(tuple(map(torch.tensor, zip(*sample['bboxes'])))).permute(1, 0)
return image, target, image_id
def __len__(self):
return self.image_ids.shape[0]
#Albumentations
def bbox_params_f():
return albu.BboxParams(format='pascal_voc', min_area=1, min_visibility=0.5 ,label_fields=['labels'] )
def get_train_transform_512():
return albu.Compose([albu.Compose([albu.RandomGamma(gamma_limit=(120, 180),p=1), albu.RandomContrast((0,0.8),p=1) ], bbox_params={'format': 'pascal_voc','label_fields': ['labels']}, p=0.65),
albu.OneOf([albu.ToGray(p=1), albu.ChannelShuffle(p=1)], p=0.4),
albu.Cutout(num_holes=16, max_h_size=40, max_w_size=40, p=0.5),
albu.Flip(p=0.65),
#albu.Resize(512, 512, p=1),
albu.RandomSizedBBoxSafeCrop(512 , 512, p=1),
#albu.OneOf([albu.RandomCrop(512 , 512 ,p=1), albu.CenterCrop(512, 512, p=1)], p=0.75),
ToTensorV2(p=1.0)],
bbox_params={'format': 'pascal_voc','label_fields': ['labels']})
def get_train_transform_1024():
return albu.Compose([albu.Compose([ albu.RandomGamma(gamma_limit=(120, 180),p=1),albu.RandomContrast((0,0.8),p=1) ], bbox_params= {'format': 'pascal_voc','label_fields': ['labels']}, p=0.65),
albu.OneOf([albu.ToGray(p=1),albu.ChannelShuffle(p=1)] , p=0.4),
albu.Cutout(num_holes=16, max_h_size=40, max_w_size=40,p=0.5),
albu.Flip(p=0.65),
ToTensorV2(p=1.0)],
bbox_params={'format': 'pascal_voc','label_fields': ['labels']})
def get_valid_transform():
return albu.Compose([ToTensorV2(p=1.0)],
bbox_params={'format': 'pascal_voc', 'label_fields': ['labels']})
Resnet 50
def resnet():
resnet_model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
num_classes = 2
in_features = resnet_model.roi_heads.box_predictor.cls_score.in_features
resnet_model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
return resnet_model
vgg16
class BoxHead(torch.nn.Module):
def __init__(self, vgg, dropout=False):
super(BoxHead, self).__init__()
classifier = list(vgg.classifier._modules.values())[:-1]
self.classifier = torch.nn.Sequential(*classifier)
def forward(self, x):
x = x.flatten(start_dim=1)
x = self.classifier(x)
return x
def vgg_model():
vgg = torchvision.models.vgg16(pretrained=True)
backbone = vgg.features[:-1]
for layer in backbone[:10]:
for p in layer.parameters():
p.requires_grad = False
backbone.out_channels = 512
box_head = BoxHead(vgg)
# RPN - Anchor Generator
anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),), aspect_ratios=((0.5, 1.0, 1.5),))
# Head - Box RoI pooling
roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'], output_size=7, sampling_ratio=2)
# Faster RCNN - Model
vgg_model = FasterRCNN(
backbone=backbone,
rpn_anchor_generator=anchor_generator,
box_roi_pool=roi_pooler,
box_head=box_head,
box_predictor=FastRCNNPredictor(4096, num_classes=2)
)
return vgg_model
def train(epochs, train_data_loader, valid_data_loader, model, optimizer, model_name, lr_scheduler = None ):
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)
loss_hist = Averager()
train_losses, valid_precision_lst, valid_iou_lst = [], [], []
for epoch in range(epochs):
itr = 1
loss_hist.reset()
model.train()
for images, targets, image_ids in tqdm(train_data_loader,desc = 'train'):
images = list(image.to(device) for image in images)
targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
loss_dict = model(images, targets)
losses = sum(loss for loss in loss_dict.values())
loss_value = losses.item()
loss_hist.send(loss_value)
optimizer.zero_grad()
losses.backward()
optimizer.step()
itr += 1
del images , targets , loss_dict , image_ids
print(f"Epoch #{epoch+1} loss: {loss_hist.value}")
train_losses.append(loss_hist.value)
model.eval()
with torch.no_grad():
validation_precisions = []
validation_iou = []
iou_thresholds = [x for x in np.arange(0.5, 0.76, 0.05)]
batch = 0
for images, targets, image_ids in tqdm(valid_data_loader ,desc = 'validation'):
batch += 1
images = list(image.to(device) for image in images)
targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
outputs = model(images)
#calculate batch stats
for i, image in enumerate(images):
iou_single_img = 0
boxes = outputs[i]['boxes'].data.cpu().numpy()
scores = outputs[i]['scores'].data.cpu().numpy()
gt_boxes = targets[i]['boxes'].cpu().numpy()
gts = gt_boxes.copy()
best_match_gt_idx, best_match_iou = 0, 0
num_overlaps = 0
for pred_idx, pred in enumerate(boxes):
best_match_gt_idx, best_match_iou = find_best_match(gts, boxes[pred_idx], pred_idx)
if best_match_gt_idx >= 0:
# Remove the matched GT box
gts[best_match_gt_idx] = -1
if best_match_iou >= 0:
num_overlaps += 1
iou_single_img += best_match_iou
#append iou for each image
try:
validation_iou.append(iou_single_img/num_overlaps)
except:
print(f"zero division : iou {iou_single_img}, overlaps {num_overlaps}")
preds_sorted_idx = np.argsort(scores)[::-1]
preds_sorted = boxes[preds_sorted_idx]
image_precision = calculate_image_precision(preds_sorted,gt_boxes,thresholds=iou_thresholds,form='coco')
#append precision for each image
validation_precisions.append(image_precision)
print(f"stats for batch {batch} ,iou:{np.mean(validation_iou)} ,precision:{ np.mean(validation_precisions)}")
#store stats for each epoch
valid_prec = np.mean(validation_precisions)
valid_precision_lst.append(valid_prec)
valid_iou = np.mean(validation_iou)
valid_iou_lst.append(valid_iou)
print(f"Epoch #{epoch+1} , presicion : {valid_prec} , iou:{valid_iou}")
# update the learning rate
if lr_scheduler is not None:
lr_scheduler.step()
torch.save(model.state_dict(), f"/kaggle/working/{model_name}.pth")
return train_losses, valid_precision_lst, valid_iou_lst
result_dict = {}
skf = KFold(n_splits=5,random_state=None, shuffle=True)
image_ids = train_df['image_id'].unique()
j = 2
for i , (train_idx , valid_idx) in enumerate(skf.split(image_ids)):
j = 4
if j > 4:
break
model = resnet()
params = [p for p in model.parameters() if p.requires_grad]
train_fold = [ image_ids[idx] for idx in train_idx]
train_set = train_df[train_df['image_id'].isin(train_fold)]
valid_fold = [ image_ids[idx] for idx in valid_idx]
valid_set = train_df[train_df['image_id'].isin(valid_fold)]
if j%2 == 0:
train_dataset = WheatDataset(train_set, DIR_TRAIN, get_train_transform_512())
else:
train_dataset = WheatDataset(train_set, DIR_TRAIN, get_train_transform_1024())
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)
if j==4:
batch_size = 16
else:
batch_size = 8
valid_dataset = WheatDataset(valid_set, DIR_TRAIN, get_valid_transform())
lr_scheduler = MultiStepLR(optimizer, [3,8])
train_data_loader = DataLoader(
train_dataset,
batch_size=batch_size,
shuffle=False,
num_workers=4,
collate_fn=collate_fn)
valid_data_loader = DataLoader(
valid_dataset,
batch_size=8,
shuffle=False,
num_workers=4,
collate_fn=collate_fn)
tl , vp , vi = train(15, train_data_loader, valid_data_loader, model, optimizer, f"resnet50Model{i+1}")
with open ("train_stats.txt","w") as fp:
fp.write("Train_loss: \n")
for line in tl:
fp.write(str(line)+" ")
fp.write("\n")
fp.write("Validation precision: \n")
for line in vp:
fp.write(str(line)+" ")
fp.write("/n")
fp.write("Validation iou: \n")
for line in vi:
fp.write(str(line)+" ")
fp.write("/n")
result_dict[f"{i}"] = [tl , vp , vi]
test_df = pd.read_csv(f'{DIR_INPUT}/sample_submission.csv')
f = open("../input/resnetmodels/resnet_train_stats.txt",'r')
lines = f.readlines()
train_loss = [float(val) for val in lines[1].split(" ")[:-1]]
valid_precision = [float(val) for val in lines[3].split(" ")[:-1]]
valid_iou = [float(val) for val in lines[5].split(" ")[:-1]]
plt.figure(figsize = [15,15])
plt.subplot(1,3,1)
plt.title("train loss")
plt.plot(train_loss)
plt.subplot(1,3,2)
plt.title("validation precision")
plt.plot(valid_precision)
plt.subplot(1,3,3)
plt.title("validation iou")
plt.plot(valid_iou)
plt.show()
class WheatTestDataset(Dataset):
def __init__(self, dataframe, image_dir, transforms=None):
super().__init__()
self.image_ids = dataframe['image_id'].unique()
self.df = dataframe
self.image_dir = image_dir
self.transforms = transforms
def __getitem__(self, index: int):
image_id = self.image_ids[index]
records = self.df[self.df['image_id'] == image_id]
image = cv2.imread(f'{self.image_dir}/{image_id}.jpg', cv2.IMREAD_COLOR)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB).astype(np.float32)
image /= 255.0
if self.transforms:
sample = {
'image': image,
}
sample = self.transforms(**sample)
image = sample['image']
return image, image_id
def __len__(self) -> int:
return self.image_ids.shape[0]
def get_test_transform():
return albu.Compose([
ToTensorV2(p=1.0)
])
Vgg16 model
class BoxHead(torch.nn.Module):
def __init__(self, vgg, dropout=False):
super(BoxHead, self).__init__()
classifier = list(vgg.classifier._modules.values())[:-1]
self.classifier = torch.nn.Sequential(*classifier)#
def forward(self, x):
x = x.flatten(start_dim=1)
x = self.classifier(x)
return x
def load_vgg16_net(checkpoint_path):
vgg = torchvision.models.vgg16(pretrained=False)
backbone = vgg.features[:-1]
for layer in backbone[:10]:
for p in layer.parameters():
p.requires_grad = False
backbone.out_channels = 512
box_head = BoxHead(vgg)
anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),), aspect_ratios=((0.5, 1.0, 1.5),))
roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'], output_size=7, sampling_ratio=2)
vgg_model = FasterRCNN(
backbone=backbone,
rpn_anchor_generator=anchor_generator,
box_roi_pool=roi_pooler,
box_head=box_head,
box_predictor=FastRCNNPredictor(4096, num_classes=2)
)
vgg_model.load_state_dict(torch.load(checkpoint_path))
vgg_model = vgg_model.cuda()
vgg_model.eval()
return vgg_model
resnet 50 model
def load_resnet(checkpoint_path, device):
resnet = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=False, pretrained_backbone=False)
num_classes = 2
in_features = resnet.roi_heads.box_predictor.cls_score.in_features
resnet.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)
resnet.load_state_dict(torch.load(checkpoint_path))
resnet = resnet.to(device)
resnet.eval()
#del checkpoint
return resnet
test_df = pd.read_csv(f'{DIR_INPUT}/sample_submission.csv')
test_dataset = WheatTestDataset(test_df, DIR_TEST, get_test_transform())
test_data_loader = DataLoader(
test_dataset,
batch_size=4,
shuffle=False,
num_workers=4,
drop_last=False,
collate_fn=collate_fn
)
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
WBF AND ENSAMBLE METHODS
!pip install ensemble-boxes
from ensemble_boxes import *
def make_ensemble_predictions(images, models):
images = list(image.to(device) for image in images)
result = []
for net in models:
outputs = net(images)
result.append(outputs)
return result
def run_wbf(predictions, image_index, image_size=512, iou_thr=0.55, skip_box_thr=0.7, weights=None):
boxes = [prediction[image_index]['boxes'].data.cpu().numpy()/(image_size-1) for prediction in predictions]
scores = [prediction[image_index]['scores'].data.cpu().numpy() for prediction in predictions]
labels = [np.ones(prediction[image_index]['scores'].shape[0]) for prediction in predictions]
boxes, scores, labels = weighted_boxes_fusion(boxes, scores, labels, weights=None, iou_thr=iou_thr, skip_box_thr=skip_box_thr)
boxes = boxes*(image_size-1)
return boxes, scores, labels
renet_models = [ load_resnet('../input/resnetmodels/resnet50Model1.pth',device),
load_resnet('../input/resnetmodels/resnet50Model2.pth',device),
load_resnet('../input/resnetmodels/resnet50Model3.pth',device),
load_resnet('../input/resnetmodels/resnet50Model4.pth',device),
load_resnet('../input/resnetmodels/resnet50Model5.pth',device)
]
vgg_models = [ load_vgg16_net('../input/vggmodels/vgg16Model1.pth', device),
load_vgg16_net('../input/vggmodels/vgg16Model12.pth', device),
load_vgg16_net('../input/vggmodels/vgg16Model13.pth', device),
load_vgg16_net('../input/vggmodels/vgg16Model14.pth', device),
load_vgg16_net('../input/vggmodels/vgg16Model15.pth', device)
]
import matplotlib.pyplot as plt
for j, (images, image_ids) in enumerate(test_data_loader):
if j > 0:
break
predictions = make_ensemble_predictions(images, renet_models)
i = 1
sample = images[i].permute(1,2,0).cpu().numpy()
boxes, scores, labels = run_wbf(predictions, image_index=i)
boxes = boxes.astype(np.int32).clip(min=0, max=511)
fig, ax = plt.subplots(1, 1, figsize=(16, 8))
for box in boxes:
cv2.rectangle(sample,
(box[0], box[1]),
(box[2], box[3]),
(220, 0, 0), 2)
ax.set_axis_off()
ax.imshow(sample);
def format_prediction_string(boxes, scores):
pred_strings = []
for j in zip(scores, boxes):
pred_strings.append("{0:.4f} {1} {2} {3} {4}".format(j[0], j[1][0], j[1][1], j[1][2], j[1][3]))
return " ".join(pred_strings)
results = []
for images, image_ids in test_data_loader:
predictions = make_ensemble_predictions(images, renet_models)
for i, image in enumerate(images):
boxes, scores, labels = run_wbf(predictions, image_index=i)
boxes = (boxes*2).astype(np.int32).clip(min=0, max=1023)
image_id = image_ids[i]
boxes[:, 2] = boxes[:, 2] - boxes[:, 0]
boxes[:, 3] = boxes[:, 3] - boxes[:, 1]
result = {
'image_id': image_id,
'PredictionString': format_prediction_string(boxes, scores)
}
results.append(result)
test_df = pd.DataFrame(results, columns=['image_id', 'PredictionString'])
test_df.to_csv('submission.csv', index=False)